clear all
set printcolor gs1, permanently
set more off
cd ""
global dirout ""
global dirobs ""


use "CDMEX_noEDOMEX_weekly.dta", clear
collapse (sum) epaid_fares_usd cash_fares_usd epaid_trips cash_trips, by(week_date) fast
split week_date, p("-")
destring week_date*, replace
rename week_date1 year 
rename week_date2 month
rename week_date3 day
gen all_dates=mdy(month, day, year)
gen dow=dow(all_dates) 
replace dow=7 if dow==0
replace dow=dow-1
gen week = all_dates - dow
egen num_trips=rowtotal(epaid_trips cash_trips)
egen sum_fare=rowtotal(epaid_fares_usd cash_fares_usd)
rename cash_trips num_trips_cash
keep week num_trips sum_fare num_trips_cash
gen city="Mexico City"
save df_weekly_extended.dta, replace

use "EDOMEX.dta", clear
collapse (sum) epaid_fares_usd cash_fares_usd epaid_trips cash_trips, by(week_date) fast
split week_date, p("-")
destring week_date*, replace
rename week_date1 year 
rename week_date2 month
rename week_date3 day
gen all_dates=mdy(month, day, year)
gen dow=dow(all_dates) 
replace dow=7 if dow==0
replace dow=dow-1
gen week = all_dates - dow
egen num_trips=rowtotal(epaid_trips cash_trips)
egen sum_fare=rowtotal(epaid_fares_usd cash_fares_usd)
rename cash_trips num_trips_cash
keep week num_trips sum_fare num_trips_cash
gen city="Estado de Mexico"
save edo_mex_weekly_extended.dta, replace

use df_weekly_extended.dta, replace
append using edo_mex_weekly_extended.dta
replace num_trips_cash=0 if city=="Mexico City"  
foreach var in num_trips_cash num_trips sum_fare {
bysort week: egen tot_`var'=sum(`var')
gen share_`var'=`var'/tot_`var'
}
keep if city=="Estado de Mexico"
keep week share*
save edo_mex_shares.dta, replace

import delimited "obs_data2.csv", encoding(ISO-8859-1)clear
keep if city_name=="Mexico City"
gen num_trips_cash=num_trips if user_type=="pure_cash"
gen sum_fare_cash=sum_fare if user_type=="pure_cash"
gen sum_miles_cash=sum_miles if user_type=="pure_cash"
gen num_violent_trips_cash=num_violent_trips if user_type=="pure_cash"
gen num_trips_mixed=num_trips if user_type=="mixed_user"
gen sum_fare_mixed=sum_fare if user_type=="mixed_user"
gen sum_miles_mixed=sum_miles if user_type=="mixed_user"
gen num_violent_trips_mixed=num_violent_trips if user_type=="mixed_user"

split datestr, p("-")
destring datestr*, replace
rename datestr1 year 
rename datestr2 month
rename datestr3 day
gen all_dates=mdy(month, day, year)
gen dow=dow(all_dates) 
replace dow=7 if dow==0
replace dow=dow-1
gen week = all_dates - dow
collapse (sum) num_trips sum_fare sum_miles num_violent_trips *_cash *_mixed, by(city_name all_dates week year month)
merge m:1 week using edo_mex_shares.dta, keep(match) nogenerate

foreach var in num_trips_cash num_trips sum_fare {
gen `var'_edomex=`var'*share_`var'
}
keep city_name year month all_dates num_trips_cash_edomex num_trips_edomex sum_fare_edomex
replace city_name="Estado de Mexico"
rename num_trips_cash_edomex num_trips_cash
rename num_trips_edomex num_trips
rename sum_fare_edomex sum_fare
save edo_mex_shares.dta, replace


use "launch_dates.dta", clear
joinby id using "ZM_Municipio_2015.dta",  unmatched(both) 
replace city="Queretaro" if ciudad=="Querétaro"
replace _merge=3 if city=="Queretaro"
drop if _merge==2
drop _merge
replace municipio=municipio2 if municipio==.
drop if ciudad=="Puerto Vallarta" & municipio==18020
drop municipio2
rename entidad entidad_id
joinby municipio using "population_all_mun.dta", unmatched(master) 
gen str5 municipio2 = string(municipio,"%05.0f")
gen state=substr(municipio2,1,2)
replace city="Estado de Mexico" if state=="15" & city!="Toluca"
collapse (sum) population, by(year city)
replace city="Mexico City" if city=="Mexico City (Metropolitan area)"
keep if year==2017 | year==2018
duplicates drop
save population_temp.dta, replace


import delimited "obs_data2.csv", encoding(ISO-8859-1)clear

drop if city=="Veracruz"
drop if city=="Riviera Maya"
drop if city=="Pachuca"
drop if city=="Durango"
drop if city=="Colima"

gen num_trips_cash=num_trips if user_type=="pure_cash"
gen sum_fare_cash=sum_fare if user_type=="pure_cash"
gen sum_miles_cash=sum_miles if user_type=="pure_cash"
gen num_violent_trips_cash=num_violent_trips if user_type=="pure_cash"
gen num_trips_mixed=num_trips if user_type=="mixed_user"
gen sum_fare_mixed=sum_fare if user_type=="mixed_user"
gen sum_miles_mixed=sum_miles if user_type=="mixed_user"
gen num_violent_trips_mixed=num_violent_trips if user_type=="mixed_user"

split datestr, p("-")
destring datestr*, replace
rename datestr1 year 
rename datestr2 month
rename datestr3 day
gen all_dates=mdy(month, day, year)

collapse (sum) num_trips sum_fare sum_miles num_violent_trips *_cash *_mixed, by(city_name all_dates year month)
append using edo_mex_shares.dta

egen city_id=group(city_name)
format all_dates %td
tsset city_id all_dates

drop if sum_fare==0
keep if (month>=8 & year==2017) | (month<=3 & year==2018)
bysort city_id: gen count= _N
egen max_count=max(count)
keep if count==max_count 
gen date=all_dates
order city_name city_id all_dates date

foreach var in num_trips num_trips_cash sum_fare {
gen `var'_temp=`var' if all_dates==21093
bysort city_id: egen `var'_max=max(`var'_temp)
gen `var'_norm=`var'/`var'_max
drop `var'_temp `var'_max
}
save SC_Puebla.dta, replace



use SC_Puebla.dta, clear
drop if city_id==34
rename city_name city
merge m:1 year city using population_temp.dta, nogenerate keep(match)
capture erase sc_Puebla_trips.dta

replace sum_fare=sum_fare/num_trips
replace num_trips=num_trips/population
replace num_trips_cash=num_trips_cash/population

synth num_trips num_trips_cash sum_fare ///
 num_trips(21063) num_trips(21046)   ///
, trunit(31) trperiod(21161) xperiod(21032(1)21274) nested fig keep(sc_Puebla_trips)

use sc_Puebla_trips.dta, clear
rename _W_Weight weight
rename _Y_treated Puebla
rename _Y_synthetic Synthetic
rename _time time
format time %td

replace Puebla=Puebla*1000
replace Synthetic=Synthetic*1000


#delimit
twoway 
(line Puebla time, lwidth(thick) lcolor(cranberry)) 
(line Synthetic time, lcolor(black) lpattern(longdash)) 
, ytitle("Number of trips (per 1000 persons)", height(5)) 
 xtitle(" ") ylabel(5[5]30)
title(" ") xline(20877, lwidth(thin) lcolor(black) lpattern(dash))
tlabel(15aug2017  15oct2017  15dec2017  15feb2018 15feb2018, format(%d)) 
xline(21161, lwidth(thin) lcolor(black) lpattern(dash))
xline(21077, lwidth(thin) lcolor(red) lpattern(dash))
xline(21115, lwidth(thin) lcolor(ebblue) lpattern(dash))
legend(order (1 "Puebla" 2 "Synthetic Puebla") cols(2) region(lwidth(none)))
graphregion(color(white)) plotregion(fcolor(white));
#delimit cr	
graph export "$dirout/Figure8a.eps", replace

gen difference = Puebla - Synthetic
gen temp=Puebla
replace temp=. if time>=21161
egen mean=mean(temp) 
gen pct_difference=difference/mean

use SC_Puebla.dta, clear
drop if city_id==34
keep if (month>=8 & year==2017) | (month<=3 & year==2018)
rename city_name city
merge m:1 year city using population_temp.dta, nogenerate keep(match)
capture erase sc_Puebla_trips.dta

replace sum_fare=sum_fare/num_trips
replace num_trips=num_trips/population
replace num_trips_cash=num_trips_cash/population

synth num_trips num_trips_cash sum_fare ///
 num_trips(21063) num_trips(21046)   ///
, trunit(31) trperiod(21115) xperiod(21032(1)21274) nested fig keep(sc_Puebla_trips)

use sc_Puebla_trips.dta, clear
rename _W_Weight weight
rename _Y_treated Puebla
rename _Y_synthetic Synthetic
rename _time time
format time %td

replace Puebla=Puebla*1000
replace Synthetic=Synthetic*1000

#delimit
twoway 
(line Puebla time, lwidth(thick) lcolor(cranberry)) 
(line Synthetic time, lcolor(black) lpattern(longdash)) 
, ytitle("Number of trips (per 1000 persons)", height(5)) 
 xtitle(" ") ylabel(0[5]30)
title(" ") xline(20877, lwidth(thin) lcolor(black) lpattern(dash))
tlabel(15aug2017  15oct2017  15dec2017  15feb2018 15feb2018, format(%d)) 
xline(21161, lwidth(thin) lcolor(black) lpattern(dash))
xline(21077, lwidth(thin) lcolor(red) lpattern(dash))
xline(21115, lwidth(thin) lcolor(ebblue) lpattern(dash))
legend(order (1 "Puebla" 2 "Synthetic Puebla") cols(2) region(lwidth(none)))
graphregion(color(white)) plotregion(fcolor(white));
#delimit cr	
graph export "$dirout/FigureD1b.eps", replace

use SC_Puebla.dta, clear
drop if city_id==34
keep if (month>=8 & year==2017) | (month<=3 & year==2018)
rename city_name city
merge m:1 year city using population_temp.dta, nogenerate keep(match)
capture erase sc_Puebla_trips.dta

replace sum_fare=sum_fare/num_trips
replace num_trips=num_trips/population
replace num_trips_cash=num_trips_cash/population

synth num_trips num_trips_cash sum_fare ///
 num_trips(21063) num_trips(21046)   ///
, trunit(31) trperiod(21077) xperiod(21032(1)21274) nested fig keep(sc_Puebla_trips)

use sc_Puebla_trips.dta, clear
rename _W_Weight weight
rename _Y_treated Puebla
rename _Y_synthetic Synthetic
rename _time time
format time %td

replace Puebla=Puebla*1000
replace Synthetic=Synthetic*1000


#delimit
twoway 
(line Puebla time, lwidth(thick) lcolor(cranberry)) 
(line Synthetic time, lcolor(black) lpattern(longdash)) 
, ytitle("Number of trips (per 1000 persons)", height(5)) 
 xtitle(" ") ylabel(0[5]30)
title(" ") xline(20877, lwidth(thin) lcolor(black) lpattern(dash))
tlabel(15aug2017  15oct2017  15dec2017  15feb2018 15feb2018, format(%d)) 
xline(21161, lwidth(thin) lcolor(black) lpattern(dash))
xline(21077, lwidth(thin) lcolor(red) lpattern(dash))
xline(21115, lwidth(thin) lcolor(ebblue) lpattern(dash))
legend(order (1 "Puebla" 2 "Synthetic Puebla") cols(2) region(lwidth(none)))
graphregion(color(white)) plotregion(fcolor(white));
#delimit cr	
graph export "$dirout/FigureD1a.eps", replace

 
use SC_Puebla.dta, clear
rename city_name city
merge m:1 year city using population_temp.dta, nogenerate keep(match)
capture erase sc_Puebla_trips.dta
replace sum_fare=sum_fare/num_trips
replace num_trips=num_trips/population
replace num_trips_cash=num_trips_cash/population
tsset city_id all_dates
drop if city_id==34

forvalues i=1/50 { 
capture erase sc_`i'_trips.dta
}


levelsof city_id, local(cities) 
foreach i of local cities {
synth num_trips num_trips_cash sum_fare num_trips(21105) ///
 num_trips(21033) num_trips(21063)  num_trips(21093)  num_trips(21124) num_trips(21154) ///
, trunit(`i') trperiod(21161) xperiod(21032(1)21274) nested fig keep(sc_`i'_trips)
	matrix temp1 = e(Y_treated)
	matrix Ymat = (nullmat(Ymat), temp1)
	matrix temp1 = e(W_weights)
	matrix temp2 = temp1[1...,2]
	matrix weightsmat = (nullmat(weightsmat), temp2)

        }

matrix v = J(1, colsof(Ymat), 0)
local dim (`= rowsof(Ymat)',`=colsof(Ymat)') 
di "`dim'"
local dim (`= rowsof(v)',`=colsof(v)') 
di "`dim'"

forvalues j=1/3 {
do "function_SCM-CS_v07_stata.do"
SCMCS Ymat weightsmat 30 130 0 v 30 "constant"  `j'/26
svmat results
keep results1 results2
rename results1 upper_bound`j'
rename results2 lower_bound`j'
gen n=_n
save SC_bounds`j'.dta, replace
}



use sc_31_trips, clear
gen n=_n
gen T0_temp=1 if _time<21161
egen T0=sum(T0_temp)
gen T_temp=1 if _time>=21161
egen T=sum(T_temp)

rename _Y_synthetic Synthetic
rename _Y_treated Puebla
rename _W_Weight weight
rename _Co_Number co
replace Puebla=Puebla
replace Synthetic=Synthetic
gen diff=Puebla-Synthetic
gen temp_mean=Puebla if _time<21161
egen mean=mean(temp_mean) 
drop temp_mean
gen pct_difference=diff/mean
rename _time time
format time %td
merge 1:1 n using SC_bounds1.dta, keep(match) nogenerate
merge 1:1 n using SC_bounds2.dta, keep(match) nogenerate
merge 1:1 n using SC_bounds3.dta, keep(match) nogenerate

capture drop mean
capture drop difference
capture drop pct_difference
capture drop temp
gen difference = Puebla - Synthetic
gen temp=Puebla
replace temp=. if time>=21161
egen mean=mean(temp) 
gen pct_difference=difference/mean

foreach var in upper_bound1 lower_bound1 upper_bound2 lower_bound2 upper_bound3 lower_bound3 {
replace `var'=`var'/mean
}

 
#delimit
twoway 
(line upper_bound1 time, lcolor(gs4)  lpattern(dash)) 
(line lower_bound1 time, lcolor(gs4)  lpattern(dash)) 
(line pct_difference time, lwidth(thick) lcolor(cranberry)) 
, ytitle("Pct Difference Number of trips", height(5)) 
 xtitle(" ") ylabel(-.75[.25].5)
title(" ") xline(20877, lwidth(thin) lcolor(black) lpattern(dash))
tlabel(15aug2017  15oct2017  15dec2017  15feb2018 15feb2018, format(%d)) 
xline(21161, lwidth(thin) lcolor(black) lpattern(dash))
xline(21077, lwidth(thin) lcolor(red) lpattern(dash))
xline(21115, lwidth(thin) lcolor(ebblue) lpattern(dash))
legend(off)
graphregion(color(white)) plotregion(fcolor(white));
#delimit cr	
graph export "$dirout/Figure8b.eps", replace


clear all
use "launch_dates.dta", clear
joinby id using "ZM_Municipio_2015.dta",  unmatched(both) 
replace city="Queretaro" if ciudad=="Querétaro"
replace _merge=3 if city=="Queretaro"
drop if _merge==2
drop _merge
replace municipio=municipio2 if municipio==.
drop if ciudad=="Puerto Vallarta" & municipio==18020
drop municipio2
rename entidad entidad_id
joinby municipio using "population_all_mun.dta", unmatched(master) 
collapse (sum) population, by(year city)
replace city="Mexico City" if city=="Mexico City (Metropolitan area)"
keep if year==2017 | year==2018
duplicates drop
save population_temp.dta, replace

import delimited "obs_data2.csv", encoding(ISO-8859-1)clear
drop if city=="Veracruz"
drop if city=="Riviera Maya"
drop if city=="Pachuca"
drop if city=="Durango"
drop if city=="Colima"

gen num_trips_cash=num_trips if user_type=="pure_cash"
gen sum_fare_cash=sum_fare if user_type=="pure_cash"
gen sum_miles_cash=sum_miles if user_type=="pure_cash"
gen num_violent_trips_cash=num_violent_trips if user_type=="pure_cash"
gen num_trips_mixed=num_trips if user_type=="mixed_user"
gen sum_fare_mixed=sum_fare if user_type=="mixed_user"
gen sum_miles_mixed=sum_miles if user_type=="mixed_user"
gen num_violent_trips_mixed=num_violent_trips if user_type=="mixed_user"

split datestr, p("-")
destring datestr*, replace
rename datestr1 year 
rename datestr2 month
rename datestr3 day
gen all_dates=mdy(month, day, year)

collapse (sum) num_trips sum_fare sum_miles num_violent_trips *_cash *_mixed, by(city_name all_dates year month)
egen city_id=group(city_name)
format all_dates %td
tsset city_id all_dates


drop if sum_fare==0
keep if (month>=8 & year==2017) | (month<=3 & year==2018)
bysort city_id: gen count= _N
egen max_count=max(count)
keep if count==max_count  
gen date=all_dates
order city_name city_id all_dates date

gen price= sum_fare/sum_miles

foreach var in num_trips num_trips_cash sum_fare price {
gen `var'_temp=`var' if all_dates==21093
bysort city_id: egen `var'_max=max(`var'_temp)
gen `var'_norm=`var'/`var'_max
drop `var'_temp `var'_max
}
save SC_Puebla_price.dta, replace


use SC_Puebla_price.dta, clear
capture erase sc_Puebla_price.dta
drop if city_id==33
rename city_name city
merge m:1 year city using population_temp.dta, nogenerate keep(match)
capture erase sc_Puebla_fares.dta

replace sum_fare=sum_fare/num_trips
replace num_trips=num_trips/population
replace num_trips_cash=num_trips_cash/population
gen total_fares=sum_fare*num_trips


synth price total_fares num_trips num_trips_cash sum_fare ///
 price(21063) price(21046)   ///
, trunit(30) trperiod(21161) xperiod(21032(1)21274) nested fig keep(sc_Puebla_price)

use sc_Puebla_price.dta, clear
rename _W_Weight weight
rename _Y_treated Puebla
rename _Y_synthetic Synthetic
rename _time time
format time %td

replace Puebla=Puebla 
replace Synthetic=Synthetic 


#delimit
twoway 
(line Puebla time, lwidth(thick) lcolor(cranberry)) 
(line Synthetic time, lcolor(black) lpattern(longdash)) 
, ytitle("Price", height(5)) 
 xtitle(" ") ylabel(0.4[.2]1.4)
title(" ") xline(20877, lwidth(thin) lcolor(black) lpattern(dash))
tlabel(15aug2017  15oct2017  15dec2017  15feb2018 15feb2018, format(%d)) 
xline(21161, lwidth(thin) lcolor(black) lpattern(dash))
xline(21077, lwidth(thin) lcolor(red) lpattern(dash))
xline(21115, lwidth(thin) lcolor(ebblue) lpattern(dash))
legend(order (1 "Puebla" 2 "Synthetic Puebla") cols(2) region(lwidth(none)))
graphregion(color(white)) plotregion(fcolor(white));
#delimit cr	
graph export "$dirout/Figure8c.eps", replace


clear all
set more off
use SC_Puebla_price.dta, clear
rename city_name city
merge m:1 year city using population_temp.dta, nogenerate keep(match)
capture erase sc_Puebla_price.dta

replace sum_fare=sum_fare/num_trips
replace num_trips=num_trips/population
replace num_trips_cash=num_trips_cash/population
gen total_fares=sum_fare*num_trips
tsset city_id all_dates
drop if city_id==33

forvalues i=1/50 { 
capture erase sc_`i'_price.dta
}


levelsof city_id, local(cities) 
foreach i of local cities {
synth price total_fares num_trips num_trips_cash sum_fare price(21105) ///
 price(21033) price(21063) price(21093)  price(21124) price(21154) ///
, trunit(`i') trperiod(21161) xperiod(21032(1)21274) nested fig keep(sc_`i'_price)
	matrix temp1 = e(Y_treated)
	matrix Ymat = (nullmat(Ymat), temp1)
	matrix temp1 = e(W_weights)
	matrix temp2 = temp1[1...,2]
	matrix weightsmat = (nullmat(weightsmat), temp2)

        }
matrix v = J(1, colsof(Ymat), 0)

local dim (`= rowsof(Ymat)',`=colsof(Ymat)') 
di "`dim'"
local dim (`= rowsof(v)',`=colsof(v)') 
di "`dim'"

forvalues j=1/3 {
do "function_SCM-CS_v07_stata.do"
SCMCS Ymat weightsmat 30 130 0 v 30 "constant"  `j'/26
svmat results
keep results1 results2
rename results1 upper_bound`j'
rename results2 lower_bound`j'
gen n=_n
save SC_bounds`j'_price.dta, replace
}

use sc_30_price, clear
gen n=_n
gen T0_temp=1 if _time<21161
egen T0=sum(T0_temp)
gen T_temp=1 if _time>=21161
egen T=sum(T_temp)

rename _Y_synthetic Synthetic
rename _Y_treated Puebla
rename _W_Weight weight
rename _Co_Number co
replace Puebla=Puebla
replace Synthetic=Synthetic
gen diff=Puebla-Synthetic
gen temp_mean=Puebla if _time<21161
egen mean=mean(temp_mean) 
drop temp_mean
gen pct_difference=diff/mean
rename _time time
format time %td
merge 1:1 n using SC_bounds1_price.dta, keep(match) nogenerate
merge 1:1 n using SC_bounds2_price.dta, keep(match) nogenerate
merge 1:1 n using SC_bounds3_price.dta, keep(match) nogenerate

capture drop mean
capture drop difference
capture drop pct_difference
capture drop temp
gen difference = Puebla - Synthetic
gen temp=Puebla
replace temp=. if time>=21161
egen mean=mean(temp) 
gen pct_difference=difference/mean

foreach var in upper_bound1 lower_bound1 upper_bound2 lower_bound2 upper_bound3 lower_bound3 {
replace `var'=`var'/mean
}

#delimit
twoway 
(line upper_bound1 time, lcolor(gs4)  lpattern(dash)) 
(line lower_bound1 time, lcolor(gs4)  lpattern(dash)) 
(line pct_difference time, lwidth(thick) lcolor(cranberry)) 

, ytitle("Pct Diff. Price", height(5)) 
 xtitle(" ") ylabel(-.4[.2].4)
title(" ") xline(20877, lwidth(thin) lcolor(black) lpattern(dash))
tlabel(15aug2017  15oct2017  15dec2017  15feb2018 15feb2018, format(%d)) 
xline(21161, lwidth(thin) lcolor(black) lpattern(dash))
legend(off)
graphregion(color(white)) plotregion(fcolor(white));
#delimit cr	
graph export "$dirout/Figure8d.eps", replace



clear all
cd ""
use "launch_dates_clean.dta", clear
gen intro_date=mdy(intro_month, intro_day, intro_year)
keep city same intro_date
rename city city_name
set obs 45
replace city_name = "Queretaro" in 44
replace same = 0 in 44
replace city_name = "Estado de Mexico" in 45
replace same = 0 in 45
replace intro_date=20781 in 45
gen dow=dow(intro_date) 
replace dow=7 if dow==0
replace dow=dow-1
gen intro_week = intro_date - dow
format intro_date intro_week %td 
drop intro_date
save "TEMP_launch_dates.dta", replace

import delimited "Driver_SignUps.csv", encoding(ISO-8859-1) clear
split date, p(" ")
split date1, p("-")
destring date1*, replace
rename date11 year 
rename date12 month
rename date13 day
keep city_name su year month day
gen all_dates=mdy(month, day, year)
gen dow=dow(all_dates) 
replace dow=7 if dow==0
replace dow=dow-1
gen week = all_dates - dow
format week all_dates %td 
keep city_name su week
rename city_name city
rename su driver_su
save "Driver_SignUps.dta", replace


import delimited "ETA_and_Supply_hours.csv", encoding(ISO-8859-1) clear
split week, p("-")
destring week*, replace
rename week1 year 
rename week2 month
rename week3 day
keep city_name minutes_per_driver average_eta_seconds year month day
gen all_dates=mdy(month, day, year)
gen dow=dow(all_dates) 
replace dow=7 if dow==0
replace dow=dow-1
gen week = all_dates - dow
format week all_dates %td 
keep city_name minutes_per_driver average_eta_seconds week
rename city_name city
destring average_eta_seconds, replace force
replace average_eta_seconds=average_eta_seconds/60
rename average_eta_seconds avg_eta_min
replace minutes_per_driver=minutes_per_driver/60
rename minutes_per_driver driver_hours
save "ETA_and_Supply_hours.dta", replace

import delimited "$dirobs/obs_data2.csv", encoding(ISO-8859-1)clear
split datestr, p("-")
destring datestr*, replace
rename datestr1 year 
rename datestr2 month
rename datestr3 day
gen all_dates=mdy(month, day, year)
gen dow=dow(all_dates) 
replace dow=7 if dow==0
replace dow=dow-1
gen week = all_dates - dow
format  week %td 
rename city_name city
collapse (sum) num_rider_canceled num_driver_canceled, by(city week)
save "Cancellation.dta", replace


import delimited "SignUps.csv", encoding(ISO-8859-1) clear
split date, p(" ")
split date1, p("-")
destring date1*, replace
rename date11 year 
rename date12 month
rename date13 day
keep city_name su year month day
gen all_dates=mdy(month, day, year)
gen dow=dow(all_dates) 
replace dow=7 if dow==0
replace dow=dow-1
gen week = all_dates - dow
format week all_dates %td 
keep city_name su week
rename city_name city
save "SignUps.dta", replace

import delimited "$dirobs/obs_data2.csv", encoding(ISO-8859-1)clear
split datestr, p("-")
destring datestr*, replace
rename datestr1 year 
rename datestr2 month
rename datestr3 day
gen all_dates=mdy(month, day, year)
gen dow=dow(all_dates) 
replace dow=7 if dow==0
replace dow=dow-1
gen week = all_dates - dow
replace num_violent_trips=0 if num_violent_trips==.
rename city_name city
collapse (sum) num* sum*, by(city week)
save "$dirobs/Sessions_Requests.dta", replace

import excel "Active Drivers Mexico.xlsx", sheet("Sheet1") firstrow clear
drop if city=="Veracruz"| city=="Riviera Maya" | city=="Pachuca" | city=="Durango" | city=="Colima"
merge m:1 city_name using "TEMP_launch_dates.dta", nogenerate
erase "TEMP_launch_dates.dta"
gen month=month(week)
gen year=year(week)

bysort city_name: egen intro_date=max(intro_week)
format intro_date week %td 

rename city_name city
merge m:1 city year month using ciudad_estado_mensual_short, keep(match) nogenerate

collapse (sum) active* total* cash* (mean) e_rate inc_pc, by(city week intro_date month year)

merge 1:1 week city using "SignUps.dta"
drop if _merge==2
drop _merge

merge 1:1 week city using "Driver_SignUps.dta"
drop if _merge==2
drop _merge

merge 1:1 week city using "ETA_and_Supply_hours.dta"
drop if _merge==2
drop _merge

merge 1:1 week city using "Cancellation.dta"
drop if _merge==2
drop _merge

merge 1:1 city week using precipitation_weekly.dta
drop if _merge==2
drop _merge

merge 1:1 city week using "$dirobs/Sessions_Requests.dta"
drop if _merge==2
drop _merge

drop if  total_fares_local==0

egen city_id=group(city)
format week %td
egen time=group(week)
tsset city_id time

drop if sum_fare==0
keep if (month>=8 & year==2017) | (month<=3 & year==2018)
bysort city_id: gen count= _N
egen max_count=max(count)
keep if count==max_count 
gen price= sum_fare/sum_miles
save SC_Puebla_other.dta, replace


use SC_Puebla_other.dta, clear
tsset city_id week
merge m:1 year city using "$dirobs/population_temp.dta", nogenerate keep(match)
replace sum_fare=sum_fare/num_trips
replace num_trips=num_trips/population
gen num_trips_cash=cash_trips/population
gen total_fares=sum_fare*num_trips


synth avg_eta_min total_fares num_trips num_trips_cash sum_fare price ///
 avg_eta_min(21059) avg_eta_min(21087) avg_eta_min(21122) avg_eta_min(21150)  ///
, trunit(24) trperiod(21157) xperiod(21038(7)21269) nested fig keep(sc_other) replace

clear all
set more off
use SC_Puebla_other.dta, clear
tsset city_id week
merge m:1 year city using "$dirobs/population_temp.dta", nogenerate keep(match)
replace sum_fare=sum_fare/num_trips
replace num_trips=num_trips/population
gen num_trips_cash=cash_trips/population
gen total_fares=sum_fare*num_trips

drop if city_id==15

forvalues i=1/26 { 
capture erase sc_`i'_other.dta
}

levelsof city_id, local(cities) 
foreach i of local cities {

synth avg_eta_min total_fares num_trips num_trips_cash sum_fare price ///
 avg_eta_min(21059) avg_eta_min(21087) avg_eta_min(21122) avg_eta_min(21150)  ///
, trunit(`i') trperiod(21157) xperiod(21038(7)21269) nested  keep(sc_`i'_other) replace
	matrix temp1 = e(Y_treated)
	matrix Ymat = (nullmat(Ymat), temp1)
	matrix temp1 = e(W_weights)
	matrix temp2 = temp1[1...,2]
	matrix weightsmat = (nullmat(weightsmat), temp2)

        }

matrix v = J(1, colsof(Ymat), 0)
local dim (`= rowsof(Ymat)',`=colsof(Ymat)') 
di "`dim'"
local dim (`= rowsof(v)',`=colsof(v)') 
di "`dim'"

forvalues j=1/3 {
do "function_SCM-CS_v07_stata.do"
SCMCS Ymat weightsmat 24 18 0 v 30 "constant"  `j'/23
svmat results
keep results1 results2
rename results1 upper_bound`j'
rename results2 lower_bound`j'
gen n=_n
save SC_bounds`j'_other.dta, replace
}

use sc_24_other, clear
gen n=_n
gen T0_temp=1 if _time<21161
egen T0=sum(T0_temp)
gen T_temp=1 if _time>=21161
egen T=sum(T_temp)

rename _Y_synthetic Synthetic
rename _Y_treated Puebla
rename _W_Weight weight
rename _Co_Number co
replace Puebla=Puebla
replace Synthetic=Synthetic
gen diff=Puebla-Synthetic
gen temp_mean=Puebla if _time<21161
egen mean=mean(temp_mean) 
drop temp_mean
gen pct_difference=diff/mean
rename _time time
format time %td
merge 1:1 n using SC_bounds1_other.dta, keep(match) nogenerate
merge 1:1 n using SC_bounds2_other.dta, keep(match) nogenerate
merge 1:1 n using SC_bounds3_other.dta, keep(match) nogenerate

capture drop mean
capture drop difference
capture drop pct_difference
capture drop temp
gen difference = Puebla - Synthetic
gen temp=Puebla
replace temp=. if time>=21161
egen mean=mean(temp) 
gen pct_difference=difference/mean

foreach var in upper_bound1 lower_bound1 upper_bound2 lower_bound2 upper_bound3 lower_bound3 {
replace `var'=`var'/mean
}

 
#delimit
twoway 
(line upper_bound1 time, lcolor(gs4)  lpattern(dash)) 
(line lower_bound1 time, lcolor(gs4)  lpattern(dash)) 
(line pct_difference time, lwidth(thick) lcolor(cranberry)) 
, ytitle("Pct Diff. Price", height(5)) 
 xtitle(" ") ylabel(-.6[.2].6)
title(" ") xline(20877, lwidth(thin) lcolor(black) lpattern(dash))
tlabel(15aug2017  15oct2017  15dec2017  15feb2018 15feb2018, format(%d)) 
xline(21161, lwidth(thin) lcolor(black) lpattern(dash))
legend(off)
graphregion(color(white)) plotregion(fcolor(white));
#delimit cr	
graph export "$dirout/FigureD2a.eps", replace


 


clear all
set more off
cd ""
global dirin = ""


use ciudad_estado_mensual_short.dta, clear
cd "$dirin"
save ciudad_estado_mensual_short.dta, replace

cd ""
use "launch_dates_clean.dta", clear
gen intro_date=ym(intro_year,intro_month)
keep city same intro_date
rename city city_name
set obs 45
replace city_name = "Queretaro" in 44
replace same = 0 in 44
replace city_name = "Estado de Mexico" in 45
replace same = 0 in 45
replace intro_date=682 in 45

cd "$dirin"
save "TEMP_launch_dates.dta", replace


use INPC_transport_gasoline.dta, clear
rename city city_name
rename city_name city

merge m:1 city year month using ciudad_estado_mensual_short
drop if _merge==2
drop _merge

gen d=all_dates 
order d all_dates
keep if all_dates>=658
drop city

tsset id all_dates
capture erase sc_Puebla_INPC.dta 
save Puebla_INPC.dta, replace 


clear all
set more off
use Puebla_INPC.dta, clear
  
tsset id all_dates
forvalues i=1/14 { 
capture erase sc_`i'_INPC.dta
}

levelsof id, local(cities) 
foreach i of local cities {
synth inflationtaxi inflationgasoline e_rate inc_pc ///
inflationtaxi(660) inflationtaxi(670) inflationtaxi(680) inflationtaxi(690) ///
, trunit(`i') trperiod(695) xperiod(658(1)701) nested fig keep(sc_`i'_INPC)
	matrix temp1 = e(Y_treated)
	matrix Ymat = (nullmat(Ymat), temp1)
	matrix temp1 = e(W_weights)
	matrix temp2 = temp1[1...,2]
	matrix weightsmat = (nullmat(weightsmat), temp2)

        }

matrix v = J(1, colsof(Ymat), 0)

local dim (`= rowsof(Ymat)',`=colsof(Ymat)') 
di "`dim'"
local dim (`= rowsof(v)',`=colsof(v)') 
di "`dim'"

forvalues j=1/3 {
do "function_SCM-CS_v07_stata.do"
SCMCS Ymat weightsmat 10 37 0 v 30 "constant"  `j'/14
svmat results
keep results1 results2
rename results1 upper_bound`j'
rename results2 lower_bound`j'
gen n=_n
save SC_bounds`j'.dta, replace
}

use sc_10_INPC, clear
gen n=_n
gen T0_temp=1 if _time<695
egen T0=sum(T0_temp)
gen T_temp=1 if _time>=695
egen T=sum(T_temp)

rename _Y_synthetic Synthetic
rename _Y_treated Puebla
rename _W_Weight weight
rename _Co_Number co
replace Puebla=Puebla
replace Synthetic=Synthetic
gen diff=Puebla-Synthetic
gen temp_mean=Puebla if _time<695
egen mean=mean(temp_mean) 
drop temp_mean
gen pct_difference=diff/mean
rename _time time
format time %tm
merge 1:1 n using SC_bounds1.dta, keep(match) nogenerate
merge 1:1 n using SC_bounds2.dta, keep(match) nogenerate
merge 1:1 n using SC_bounds3.dta, keep(match) nogenerate

#delimit
twoway 
(line diff time, lwidth(thick) lcolor(cranberry)) 
(line upper_bound1 time, lcolor(gs4)  lpattern(dash)) 
(line lower_bound1 time, lcolor(gs4)  lpattern(dash)) 
, ytitle("Pct Diff. Price Taxis", height(5)) 
 xtitle(" ") 
title(" ") xline(695, lwidth(thin) lcolor(black) lpattern(dash))
tlabel(2015m6  2016m6  2017m6  2018m6, format(%tm)) 
legend(off) ylabel(-.2[.1].2)
graphregion(color(white)) plotregion(fcolor(white));
#delimit cr	
graph export "$dirout/FigureD2b.eps", replace

